Prep data

Load necessary packages

setwd("~/Desktop/working-with-lyle/Formality_Project")#set our WD 
if (!require("pacman")) install.packages("pacman") #run this if you don't have pacman 
library(pacman)
pacman::p_load(tidyverse,rlang, zoo, lubridate, plotrix, ggpubr, caret, broom, kableExtra, reactable, install = T) 
#use pacman to load packages quickly 

Define Aesthetics for graphs and stuff

palette_map = c("#3B9AB2", "#EBCC2A", "#F21A00")
palette_condition = c("#ee9b00", "#bb3e03", "#005f73")

plot_aes = theme_classic() +
  theme(legend.position = "top",
        legend.text = element_text(size = 12),
        text = element_text(size = 16, family = "Futura Medium"),
        axis.text = element_text(color = "black"),
        axis.line = element_line(colour = "black"),
        axis.ticks.y = element_blank())

Define Table Functions

 table_model = function(model_data) {
   model_data %>% 
     tidy() %>% 
     rename("SE" = std.error,
            "t" = statistic,
            "p" = p.value) %>%
     kable() %>% 
     kableExtra::kable_styling()
 }

Load data

df <- read_csv('Atlantic_Cleaned_all_vars.csv') #read in the data

Tidy the data

 tidy_df <- df %>%
   group_by(Date) %>% ###grouping by the year 
  mutate_at(vars("Analytic","WPS","BigWords","Period","readability","grade_level"), as.numeric) %>% 
   summarise_at(vars("Analytic","WPS","BigWords","Period","readability","grade_level"),  funs(mean, std.error),) 

#create center variables
tidy_df$Analytic_centered <- tidy_df$Analytic_mean - 85.94
tidy_df$WPS_centered <- tidy_df$WPS_mean - 37.14
tidy_df$BigWords_centered <- tidy_df$BigWords_mean - 25.68
tidy_df$Period_centered <- tidy_df$Period_mean - 4.589
tidy_df$readability_centered <- tidy_df$readability_mean - 57.45
tidy_df$grade_level_centered <- tidy_df$grade_level_mean - 10.71

Flesch-Kincaid Description

Flesch-Kincaid Ease of Readability: higher scores indicate material that is easier to read; lower numbers mark passages that are more difficult to read.

The Flesch–Kincaid Grade Level Score: presents a score as a U.S. grade level, making it easier for teachers, parents, librarians, and others to judge the readability level of various books and texts.

Corpus Summary Stats

Dates

df %>% 
  select(Date) %>% 
  range()
## [1] 1857 2022

Raw count of Articles

df %>%
  select(Filename) %>%
  dplyr::summarize(n = n()) %>%
  reactable::reactable(striped = TRUE)

Number of Articles per Year

articles_year <- df %>%
  select(Filename,Date) %>%
  unique() %>%
  group_by(Date) %>%
  dplyr::summarize(n = n()) %>%
  reactable::reactable(striped = TRUE)
 articles_year

Flesch-Kincaid Graphs

Please see attached files for the graphs if needed.

Plot the Smoothed Data

#Plot our smoothed data 

#we are using Non-tidy data here to capture the individual variation 

#readability

readability_smooth <- ggplot(data=df, aes(x=Date, y=readability, group=1)) +
  ggtitle("Readability") +
  geom_point(color = "dodgerblue3", alpha = 0.5) + 
  geom_smooth(method = "loess", span = 0.50 )+ 
  plot_aes +
  labs(x = "Year", y = 'Ease of Readability') +
  theme(axis.text.x=element_text(angle=45, hjust=1), 
        plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))+
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 20)) +
  theme(axis.text=element_text(size = 14),
        axis.title=element_text(size = 20,face="bold")) +
  annotate(geom="text",x=1855,
             y=25,label="
             estimate = -0.1119 
             p-value < 0.001
           
           ", size = 3.5)

#grade level
grade_level_smooth <- ggplot(data=df, aes(x=Date, y=grade_level, group=1)) +
  ggtitle("Reading Grade Level") +
  geom_point(color = "dodgerblue3", alpha = 0.5) + 
  geom_smooth(method = "loess", span = 0.60 )+ 
  plot_aes +
  labs(x = "Year", y = 'Reading Grade Level') +
  theme(axis.text.x=element_text(angle=45, hjust=1), 
        plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))+
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 20)) +
  theme(axis.text=element_text(size = 14),
        axis.title=element_text(size = 20,face="bold")) +
  annotate(geom="text",x=1855,
             y=22,label="
             estimate = 0.0138  
             p-value < 0.001
           
           ", size = 3.5)

smooth_graphs <- ggpubr::ggarrange(readability_smooth,grade_level_smooth,ncol=1, nrow=2, common.legend = TRUE, legend = "bottom")
annotate_figure(smooth_graphs,
                top = text_grob("Smooth Flesch-Kincaid Graphs",  color = "black", face = "bold", size = 20),
                bottom = text_grob(
                "Note. Horizontal shading represents Standard Error."
                                   , color = "Black",
                                   hjust = 0.9, x = 1, face = "italic", size = 12))

Plotting the smoothed data by year

readability_smooth_tidy <- ggplot(data=tidy_df, aes(x=Date, y=readability_mean, group=1)) +
  ggtitle("Readability") +
  geom_point(color = "dodgerblue3", alpha = 0.7) + 
  geom_smooth(method = "loess", span = 0.60 )+ 
  plot_aes +
  labs(x = "Year", y = 'Ease of Readability') +
  theme(axis.text.x=element_text(angle=45, hjust=1), 
        plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))+
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 20)) +
  theme(axis.text=element_text(size = 14),
        axis.title=element_text(size = 20,face="bold")) +
  annotate(geom="text",x=1855,
             y=60,label="
             estimate = -0.079  
             p-value < 0.001
           
           ", size = 3.5)

grade_smooth_tidy <- ggplot(data=tidy_df, aes(x=Date, y=grade_level_mean, group=1)) +
  ggtitle("Grade Level") +
  geom_point(color = "dodgerblue3", alpha = 0.7) + 
  geom_smooth(method = "loess", span = 0.80 )+ 
  plot_aes +
  labs(x = "Year", y = 'Grade Level Score') +
  theme(axis.text.x=element_text(angle=45, hjust=1), 
        plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))+
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 20)) +
  theme(axis.text=element_text(size = 14),
        axis.title=element_text(size = 20,face="bold")) +
  annotate(geom="text",x=1855,
             y=12,label="
             estimate = 0.0138  
             p-value < 0.001
           
           ", size = 3.5)
tidy_smooth_graphs <- ggpubr::ggarrange(readability_smooth_tidy,grade_smooth_tidy, ncol=1, nrow=2, common.legend = TRUE, legend = "bottom")
annotate_figure(tidy_smooth_graphs,
                top = text_grob("Smoothed Graphs (grouped by year)",  color = "black", face = "bold", size = 20),
                bottom = text_grob(
                "Note. Horizontal shading represents Standard Error.
                Estimates are from mean-centered analyses (data centered on means for 1857"
                                   , color = "Black",
                                   hjust = 1, x = 1, face = "italic", size = 12))

Raw Data by Year

Readability <- ggplot(data=tidy_df, aes(x=Date, y=readability_mean, group=1)) +
   geom_line(colour = "dodgerblue3") +
   geom_ribbon(aes(ymin=readability_mean-readability_std.error, ymax=readability_mean+readability_std.error), alpha=0.2) +
   ggtitle("Readbility") +
   plot_aes + 
   labs(x = "Year", y = 'Ease of Readbility') + 
   theme(axis.text.x=element_text(angle=45, hjust=1), 
         plot.title.position = 'plot', 
         plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) +
   theme(plot.title.position = 'plot', 
         plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
   theme(axis.text=element_text(size=16),
         axis.title=element_text(size=20,face="bold"))+
   theme(plot.title.position = 'plot', 
         plot.title = element_text(hjust = 0.5, face = "bold", size = 20)) +
   theme(axis.text=element_text(size = 14),
         axis.title=element_text(size = 20,face="bold")) 

grade_level <-  ggplot(data=tidy_df, aes(x=Date, y=grade_level_mean, group=1)) +
   geom_line(colour = "dodgerblue3") +
   geom_ribbon(aes(ymin=grade_level_mean-grade_level_std.error, ymax=grade_level_mean+grade_level_std.error), alpha=0.2) +
   ggtitle("Grade Level") +
   plot_aes + 
   labs(x = "Year", y = 'Flesch-Kincaid Grade Level') + 
   theme(axis.text.x=element_text(angle=45, hjust=1), 
         plot.title.position = 'plot', 
         plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) +
   theme(plot.title.position = 'plot', 
         plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
   theme(axis.text=element_text(size=16),
         axis.title=element_text(size=20,face="bold"))+
   theme(plot.title.position = 'plot', 
         plot.title = element_text(hjust = 0.5, face = "bold", size = 20)) +
   theme(axis.text=element_text(size = 14),
         axis.title=element_text(size = 20,face="bold")) 

#raw graphs
raw_graphs <- ggpubr::ggarrange(Readability, grade_level, ncol=1, nrow=2, common.legend = TRUE, legend = "bottom")
annotate_figure(raw_graphs,
                top = text_grob("Raw Flesch-Kincaid Graphs (grouped by year)",  color = "black", face = "bold", size = 20),
                bottom = text_grob("Note. Horizontal shading represents Standard Error.
                                   )"
                                   , color = "Black",
                                   hjust = 1.3, x = 1, face = "italic", size = 16))

Build Simple Regression Models

Ease of Readability

Results presented: Raw data, aggregated by year, centered on 1857

#Raw Data
Readability_RAW <- lm(readability ~ Date, data = df)

#Tidy Data
Readability_TIDY <- lm(readability_mean ~ Date, data = tidy_df)

#Centered 
Readability_centered <- lm(readability_centered ~ Date, data = tidy_df)

table_model(Readability_RAW)
term estimate SE t p
(Intercept) 282.2207 9.6271 29.32 0
Date -0.1119 0.0049 -22.71 0
table_model(Readability_TIDY)
term estimate SE t p
(Intercept) 221.108 19.5255 11.324 0
Date -0.079 0.0101 -7.851 0
table_model(Readability_centered)
term estimate SE t p
(Intercept) 163.658 19.5255 8.382 0
Date -0.079 0.0101 -7.851 0

Grade Level Reading

Models presented in order: Raw data, aggregated by year, centered on 1857

#Raw Data
Grade_RAW <- lm(grade_level ~ Date, data = df)

#Tidy Data
Grade_TIDY <- lm(grade_level_mean ~ Date, data = tidy_df)

#Centered 

Grade_centered <- lm(grade_level_centered ~ Date, data = tidy_df)


table_model(Grade_RAW)
term estimate SE t p
(Intercept) -25.2049 2.0722 -12.16 0
Date 0.0176 0.0011 16.55 0
table_model(Grade_TIDY)
term estimate SE t p
(Intercept) -18.3108 3.7717 -4.855 0
Date 0.0138 0.0019 7.098 0
table_model(Grade_centered)
term estimate SE t p
(Intercept) -29.0208 3.7717 -7.694 0
Date 0.0138 0.0019 7.098 0